import numpy as np
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
from scipy.stats import ttest_rel
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import plotly.offline as pyo
pyo.init_notebook_mode()
# Local imports
from ipynb.fs.defs.task3_1 import DatasetManager
from ipynb.fs.defs.task3_2 import ModelManager, plot_bar_data
# T-Test
def perform_t_test(model_1, model_2, X, y, model_type, cv=10, model_1_name="Model 1", model_2_name="Model 2"):
"""
Performs a paired t-tests on two models and returns
the test statistic, p-value and a visualisation of
the cross-validation results.
"""
# Perform n-fold cross validation and obtain relevant scores
if model_type == "clf":
model_1_scores = cross_val_score(model_1, X, y, cv=cv, scoring="accuracy")
model_2_scores = cross_val_score(model_2, X, y, cv=cv, scoring="accuracy")
elif model_type == "reg":
model_1_scores = cross_val_score(model_1, X, y, cv=cv, scoring="neg_mean_squared_error")
model_2_scores = cross_val_score(model_2, X, y, cv=cv, scoring="neg_mean_squared_error")
# Calculating mean of scores
model_1_mean = np.mean(model_1_scores)
model_2_mean = np.mean(model_2_scores)
# Perform t-test
t_stat, p_val = ttest_rel(model_1_scores, model_2_scores)
# Build visualisation of cross-validation scores
x = [f"Fold {i+1}" for i in range(model_1_scores.size)] + ["Average"]
mean_bar_plot = plot_bar_data(
(model_1_name, list(model_1_scores) + [model_1_mean]),
(model_2_name, list(model_2_scores) + [model_2_mean]),
x=x,
title=f"Cross Validation Scores",
x_label="Folds",
y_label=f"{'Accuracy' if model_type == 'clf' else 'NMSE'}",
)
return t_stat, p_val, mean_bar_plot
# ModelManager class is modified to accomodate new classification and regression models
class ModelManager2(ModelManager):
def __init__(self, feature_set, targets):
super().__init__(feature_set, targets)
def train_model(self, model_type, cv_folds=10):
"""
Modified version of the same function (from Task3-2) for
training either a classifiction or regression model and
optimising hyperparameters using cross validation. This
version works with MLPClassifier and MLPRegressor instead
of SVR and SVC.
"""
assert self._train_and_test_sets != None, "You don't have your training and test sets."
# Getting training and test data
X_train = self._train_and_test_sets.get("X_train")
y_train = self._train_and_test_sets.get("y_train")
X_test = self._train_and_test_sets.get("X_test")
y_test = self._train_and_test_sets.get("y_test")
# Initialise model
if model_type == "clf":
estimator = MLPClassifier(max_iter=2000)
print("Classifier model initialised...")
elif model_type == "reg":
estimator = MLPRegressor(max_iter=2000)
print("Regression model initialised...")
# Specify hyperparameters ranges to be searched
parameter_grid = [{
"learning_rate": ["constant", "invscaling", "adaptive"],
"alpha": [0.0001, 0.001, 0.01],
}]
# Model fitting, cross-validation and hyperparameter optimisation using GridSearch
model = GridSearchCV(
estimator=estimator,
param_grid=parameter_grid,
cv=cv_folds,
refit=True
)
print("Fitting model and performing cross-validation...")
model.fit(X_train, y_train)
print("Model fitting and cross-validation complete...")
# Getting predictions
print("Making predictions...")
self._train_preds = model.predict(X_train)
self._test_preds = model.predict(X_test)
if model_type == "clf":
self._train_preds_prob = model.predict_proba(X_train)
self._test_preds_prob = model.predict_proba(X_test)
# Assigning class variables
self._trained_model = model
self._best_hps = model.best_params_
# Productivity dataset; using optimal configuration as determined in Task3-1
gwp_dsm = DatasetManager("gwp_assessment")
gwp_dsm.load_and_preprocess([0,1,2,3], "iterative")
gwp_dsm.create_feature_set(7)
gwp_dsm.scale_feature_set()
# Star dataset; using optimal configuration as determined in Task3-1
star_dsm = DatasetManager("star_assessment")
star_dsm.load_and_preprocess([0,1,8,9,12,16,17], "knn")
star_dsm.create_feature_set(8)
star_dsm.scale_feature_set()
Dataset loaded... Dataset cleaned.. Dataset encodings.. Dataset numerised... Missing values imputed... Dataset loaded... Dataset cleaned.. Dataset encodings.. Dataset numerised... Missing values imputed...
# Productivity dataset
gwp_features = gwp_dsm.get_scaled_feat_ds()
gwp_targets = gwp_dsm.get_complete_ds()[:, -1]
# Star dataset
star_features = star_dsm.get_scaled_feat_ds()
star_targets = star_dsm.get_complete_ds()[:, -1]
# GWP dataset
gwp_mm = ModelManager2(gwp_features, gwp_targets)
# Star dataset
star_mm = ModelManager2(star_features, star_targets)
Methodology
Evaluation metrics
Productivity dataset: accuracy, precision, recall, F1 score. These metrics are ideal metrics for evaluating classification models as they provide comprehensive insight into a model's performance. Accuracy helps understand the overall effectiveness of the model. However, it can be misleading in imbalanced datasets, which is where precision and recall come in. They provide a more nuanced view of the model's ability to correctly identify positive instances and avoid false positives. The F1 score harmonises precision and recall, offering a single metric that seeks a balance between these two characteristics, making it especially useful when the costs of false positives and false negatives are significantly different.
Star dataset: mean squared error (MSE), mean abolute error (MAE), R2 score. These are robust metrics for evaluating regression models, with each illuminating different aspects of model performance. MSE emphasizes larger errors by squaring residuals, making it useful when larger errors are undesirable. MAE provides a more straightforward measure of average error magnitude, regardless of direction. The R2 score complements these by providing a relative measure of how much variance the model can explain, giving a broader picture of model performance beyond just raw error. These combined provide a comprehensive assessment of the model's effectiveness.
Notes
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.8, test_size=0.2)
# Splitting star dataset
star_mm.split_dataset(train_size=0.016, test_size=0.004)
# Productivity dataset
gwp_mm.train_model("reg", 10)
Regression model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Star dataset
star_mm.train_model("clf", 10)
Classifier model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Productivity dataset
gwp_mm.visualise_results_reg()
# Star dataset
encodings = star_dsm.get_encodings()[17]
star_mm.visualise_results_clf(encodings)
Productivity dataset
Star dataset
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.75, test_size=0.25)
# Splitting star dataset
star_mm.split_dataset(train_size=0.015, test_size=0.005)
# Productivity dataset
gwp_mm.train_model("reg", 10)
Regression model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Star dataset
star_mm.train_model("clf", 10)
Classifier model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Productivity dataset
gwp_mm.visualise_results_reg()
# Star dataset
encodings = star_dsm.get_encodings()[17]
star_mm.visualise_results_clf(encodings)
Productivity dataset
Star dataset
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.7, test_size=0.3)
# Splitting star dataset
star_mm.split_dataset(train_size=0.014, test_size=0.006)
# Productivity dataset
gwp_mm.train_model("reg", 10)
Regression model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Star dataset
star_mm.train_model("clf", 10)
Classifier model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Productivity dataset
gwp_mm.visualise_results_reg()
# Star dataset
encodings = star_dsm.get_encodings()[17]
star_mm.visualise_results_clf(encodings)
Productivity dataset
Star dataset
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.6, test_size=0.4)
# Splitting star dataset
star_mm.split_dataset(train_size=0.012, test_size=0.008)
# Productivity dataset
gwp_mm.train_model("reg", 10)
Regression model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Star dataset
star_mm.train_model("clf", 10)
Classifier model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Productivity dataset
gwp_mm.visualise_results_reg()
# Star dataset
encodings = star_dsm.get_encodings()[17]
star_mm.visualise_results_clf(encodings)
Productivity dataset
Star dataset
# Splitting productivity dataset
gwp_mm.split_dataset(train_size=0.5, test_size=0.5)
# Splitting star dataset
star_mm.split_dataset(train_size=0.01, test_size=0.01)
# Productivity dataset
gwp_mm.train_model("reg", 10)
Regression model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Star dataset
star_mm.train_model("clf", 10)
Classifier model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Productivity dataset
gwp_mm.visualise_results_reg()
# Star dataset
encodings = star_dsm.get_encodings()[17]
star_mm.visualise_results_clf(encodings)
Productivity dataset
Star dataset
Productivity dataset
Star dataset
# Initialising, training and optimising model from Task3-2 using optimal train-test split ratio
# GWP dataset
gwp_mm_old = ModelManager(gwp_features, gwp_targets)
gwp_mm_old.split_dataset(train_size=0.7, test_size=0.3)
gwp_mm_old.train_model("reg", 10)
Regression model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Initialising, training and optimising model from Task3-2 using optimal train-test split ratio
# Star dataset
star_mm_old = ModelManager(star_features, star_targets)
star_mm_old.split_dataset(train_size=0.015, test_size=0.005)
star_mm_old.train_model("clf", 10)
Classifier model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Initialising, training and optimising model from Task3-3 using optimal training-test split
# GWP dataset
gwp_mm = ModelManager2(gwp_features, gwp_targets)
gwp_mm.split_dataset(train_size=0.8, test_size=0.2)
gwp_mm.train_model("reg", 10)
Regression model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Initialising, training and optimising model from Task3-2 using optimal train-test split
# Star dataset
star_mm = ModelManager2(star_features, star_targets)
star_mm.split_dataset(train_size=0.016, test_size=0.004)
star_mm.train_model("clf", 10)
Classifier model initialised... Fitting model and performing cross-validation... Model fitting and cross-validation complete... Making predictions...
# Productivity dataset
gwp_X, _, gwp_y, _ = train_test_split(
gwp_features,
gwp_targets,
train_size=0.9999,
test_size=None,
)
# Star dataset
star_X, _, star_y, _ = train_test_split(
star_features,
star_targets,
train_size=0.02,
test_size=None,
)
# Get models
svm_reg = gwp_mm_old.get_trained_model()
mlp_reg = gwp_mm.get_trained_model()
# Perform t-test
t_stat, p_val, cv_bar_plot = perform_t_test(
svm_reg,
mlp_reg,
gwp_X,
gwp_y,
model_type="reg",
cv=5,
model_1_name="SVM Regressor",
model_2_name="MLP Regressor"
)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_val}")
t-statistic: 2.090858362757806 p-value: 0.10472907300670688
# Visualise cross-validation scores
cv_bar_plot
# Get models
svm_clf = star_mm_old.get_trained_model()
mlp_clf = star_mm.get_trained_model()
# Perform t-test
t_stat, p_val, cv_bar_plot = perform_t_test(
svm_clf,
mlp_clf,
star_X,
star_y,
model_type="clf",
cv=5,
model_1_name="SVM Classifier",
model_2_name="MLP Classifier"
)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_val}")
t-statistic: -0.4009104504102879 p-value: 0.7089783348997766
# Visualise cross-validation scores
cv_bar_plot
Productivity dataset
Star Dataset